#!/usr/bin/python

import os,glob,re,traceback,codecs,os.path

import argparse
parser = argparse.ArgumentParser(description = """
Create a simple side-by-side display of OCR results.
""")
parser.add_argument('-s','--fontsize',default=10,type=int)
parser.add_argument('-R','--readme',default="README")
parser.add_argument('-x','--extensions',default=".txt")
parser.add_argument('-o','--output',default="index.html")
parser.add_argument('-b','--bookdir',default="book")
parser.add_argument('-N','--maxpages',default=10,type=int)
args = parser.parse_args()

extensions = args.extensions.split()

def format(s):
    s = re.sub(r'[<]','&lt;',s)
    s = re.sub(r'[\s\n]*\n[\s\n]*','',s)
    return s

readme = None
if os.path.exists(args.readme):
    readme = codecs.open(args.readme,"r","utf-8").read()

os.chdir(args.bookdir)

stream = codecs.open(args.output,"w","utf-8")

def P(x):
    stream.write(x)
    stream.write("\n")

P("<header>")
P('<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>')
P("""<style type='text/css'>
td {
    font-size:%dpt;
    border: solid 2px #808080;
}
</style>"""%args.fontsize)
P("</header>")
P("<body>")

if readme is not None:
    import markdown
    P(markdown.markdown(readme))
else:
    P("""
<h1>Image vs Output</h1>
""")

P("<table>")
P("<tr>")
for t in ["image"]+extensions:
    P("<th>%s</th>"%t)
P("</tr>")

for i,base in enumerate(sorted(glob.glob("[0-9][0-9][0-9][0-9]"))):
    if i>=args.maxpages: break
    P("<tr>")
    P("<td valign=top style='width:600px'>")
    print base
    if os.path.exists("%s.nrm.png"%base):
        P("<img src='%s.nrm.png' width=100%% />"%base)
    elif os.path.exists("%s.bin.png"%base):
        P("<img src='%s.bin.png' width=100%% />"%base)
    else:
        P("<p>[no image for %s]<p>"%base)
    P("<font size=1>%s</font>"%base)
    P("</td>")
    for ext in extensions:
        P("<td valign=top style='width:600px;padding:3ex'>")
        for lname in sorted(glob.glob(base+"/0?????"+ext)):
            try:
                P(format(codecs.open(lname,"r","utf-8").read())+u"<br />")
            except:
                traceback.print_exc()
        P("</td>")
    P("</tr>")
P("</table>")

stream.close()
